@@ -19,13 +19,15 @@ extern "C" {
19
19
20
20
struct ggml_backend_buffer_type_i {
21
21
const char * (* GGML_CALL get_name ) (ggml_backend_buffer_type_t buft );
22
+ // allocate a buffer of this type
22
23
ggml_backend_buffer_t (* GGML_CALL alloc_buffer ) (ggml_backend_buffer_type_t buft , size_t size );
23
- size_t (* GGML_CALL get_alignment ) (ggml_backend_buffer_type_t buft ); // tensor alignment
24
- size_t (* GGML_CALL get_max_size ) (ggml_backend_buffer_type_t buft ); // allocation max size
25
- size_t (* GGML_CALL get_alloc_size ) (ggml_backend_buffer_type_t buft , const struct ggml_tensor * tensor ); // data size needed to allocate the tensor, including padding
26
- bool (* GGML_CALL supports_backend )(ggml_backend_buffer_type_t buft , ggml_backend_t backend ); // check if the buffer type is usable by the backend
24
+ // tensor alignment
25
+ size_t (* GGML_CALL get_alignment ) (ggml_backend_buffer_type_t buft );
26
+ // max buffer size that can be allocated
27
+ size_t (* GGML_CALL get_max_size ) (ggml_backend_buffer_type_t buft );
28
+ // data size needed to allocate the tensor, including padding
29
+ size_t (* GGML_CALL get_alloc_size ) (ggml_backend_buffer_type_t buft , const struct ggml_tensor * tensor );
27
30
// check if tensor data is in host memory
28
- // should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
29
31
bool (* GGML_CALL is_host ) (ggml_backend_buffer_type_t buft );
30
32
};
31
33
@@ -94,27 +96,37 @@ extern "C" {
94
96
void (* GGML_CALL synchronize )(ggml_backend_t backend );
95
97
96
98
// compute graph with a plan (not used currently)
99
+ // create a new plan for a graph
97
100
ggml_backend_graph_plan_t (* GGML_CALL graph_plan_create ) (ggml_backend_t backend , const struct ggml_cgraph * cgraph );
98
101
void (* GGML_CALL graph_plan_free ) (ggml_backend_t backend , ggml_backend_graph_plan_t plan );
102
+ // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
103
+ void (* GGML_CALL graph_plan_update ) (ggml_backend_t backend , ggml_backend_graph_plan_t plan , const struct ggml_cgraph * cgraph );
104
+ // compute the graph with the plan
105
+ enum ggml_status (* GGML_CALL graph_plan_compute )(ggml_backend_t backend , ggml_backend_graph_plan_t plan );
99
106
100
- // compute graph with a plan
101
- enum ggml_status (* GGML_CALL graph_plan_compute )(ggml_backend_t backend , ggml_backend_graph_plan_t plan );
102
107
// compute graph without a plan (async)
103
108
enum ggml_status (* GGML_CALL graph_compute ) (ggml_backend_t backend , struct ggml_cgraph * cgraph );
104
109
105
- // check if the backend supports an operation
110
+ // check if the backend can compute an operation
106
111
bool (* GGML_CALL supports_op )(ggml_backend_t backend , const struct ggml_tensor * op );
107
112
113
+ // check if the backend can use tensors allocated in a buffer type
114
+ bool (* GGML_CALL supports_buft )(ggml_backend_t backend , ggml_backend_buffer_type_t buft );
115
+
108
116
// check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
109
117
// these should be expensive operations with large batch sizes that may benefit from running on this backend
110
118
// even if the weight has to be copied from the CPU temporarily
111
119
bool (* GGML_CALL offload_op )(ggml_backend_t backend , const struct ggml_tensor * op );
112
120
113
121
// (optional) event synchronization
122
+ // create a new event that can record events on this backend instance
114
123
ggml_backend_event_t (* GGML_CALL event_new ) (ggml_backend_t backend );
115
124
void (* GGML_CALL event_free ) (ggml_backend_event_t event );
125
+ // record an event on the backend instance that created it
116
126
void (* GGML_CALL event_record ) (ggml_backend_event_t event );
127
+ // wait for an event on on a different backend instance
117
128
void (* GGML_CALL event_wait ) (ggml_backend_t backend , ggml_backend_event_t event );
129
+ // block until an event is recorded
118
130
void (* GGML_CALL event_synchronize ) (ggml_backend_event_t event );
119
131
};
120
132
@@ -163,7 +175,7 @@ extern "C" {
163
175
void (* GGML_CALL ggml_backend_tensor_set )(struct ggml_tensor * , const void * , size_t , size_t );
164
176
bool (* GGML_CALL ggml_is_quantized )(enum ggml_type );
165
177
size_t (* GGML_CALL ggml_type_size )(enum ggml_type );
166
- int (* GGML_CALL ggml_blck_size )(enum ggml_type );
178
+ int64_t (* GGML_CALL ggml_blck_size )(enum ggml_type );
167
179
bool (* GGML_CALL ggml_is_transposed )(const struct ggml_tensor * );
168
180
size_t (* GGML_CALL ggml_nbytes )(const struct ggml_tensor * );
169
181
enum ggml_unary_op (* GGML_CALL ggml_get_unary_op )(const struct ggml_tensor * );
@@ -180,7 +192,11 @@ extern "C" {
180
192
bool (* GGML_CALL ggml_backend_buffer_is_host )(ggml_backend_buffer_t );
181
193
bool (* GGML_CALL ggml_guid_matches )(ggml_guid_t , ggml_guid_t );
182
194
bool (* GGML_CALL ggml_is_empty )(const struct ggml_tensor * );
195
+ enum ggml_backend_buffer_usage (* GGML_CALL ggml_backend_buffer_get_usage )(ggml_backend_buffer_t );
183
196
bool (* GGML_CALL ggml_are_same_shape )(const struct ggml_tensor * , const struct ggml_tensor * );
197
+ void (* GGML_CALL ggml_abort )(const char * , int , const char * , ...);
198
+ bool (* GGML_CALL ggml_is_contiguous_1 )(const struct ggml_tensor * );
199
+ bool (* GGML_CALL ggml_is_contiguous_2 )(const struct ggml_tensor * );
184
200
};
185
201
186
202
#ifdef __cplusplus
0 commit comments